{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''\n",
    "\n",
    "out = 'xlnet-large-bahasa-standard-cased'\n",
    "os.makedirs(out, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import XLNetTokenizer, XLNetModel, XLNetConfig, AutoTokenizer, AutoModelWithLMHead, pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('xlnet-large-bahasa-standard-cased/spiece.model',\n",
       " 'xlnet-large-bahasa-standard-cased/special_tokens_map.json',\n",
       " 'xlnet-large-bahasa-standard-cased/added_tokens.json')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer = XLNetTokenizer('sp10m.cased.v9.model', do_lower_case = False)\n",
    "tokenizer.save_pretrained('xlnet-large-bahasa-standard-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = XLNetTokenizer.from_pretrained('./xlnet-large-bahasa-standard-cased', do_lower_case = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:transformers.modeling_xlnet:Loading TF weight global_step with shape []\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/lm_loss/bias with shape [32000]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/lm_loss/bias/adam_m with shape [32000]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/lm_loss/bias/adam_v with shape [32000]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_0/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_1/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_10/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_11/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_12/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_13/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_14/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_15/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_16/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_17/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_18/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_19/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_2/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_3/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_4/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_5/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_6/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_7/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_8/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/bias with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/bias/adam_m with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/bias/adam_v with shape [4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/kernel/adam_m with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_1/kernel/adam_v with shape [1024, 4096]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/bias with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/bias/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/bias/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/kernel/adam_m with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/ff/layer_2/kernel/adam_v with shape [4096, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/beta/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/beta/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/gamma/adam_m with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/LayerNorm/gamma/adam_v with shape [1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/k/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/k/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/k/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/o/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/o/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/o/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/q/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/q/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/q/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/r/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/r/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/r/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/v/kernel with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/v/kernel/adam_m with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/layer_9/rel_attn/v/kernel/adam_v with shape [1024, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/mask_emb/mask_emb with shape [1, 1, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/mask_emb/mask_emb/adam_m with shape [1, 1, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/mask_emb/mask_emb/adam_v with shape [1, 1, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_r_bias with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_r_bias/adam_m with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_r_bias/adam_v with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_s_bias with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_s_bias/adam_m with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_s_bias/adam_v with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_w_bias with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_w_bias/adam_m with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/r_w_bias/adam_v with shape [20, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/seg_embed with shape [20, 2, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/seg_embed/adam_m with shape [20, 2, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/seg_embed/adam_v with shape [20, 2, 16, 64]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/word_embedding/lookup_table with shape [32000, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/word_embedding/lookup_table/adam_m with shape [32000, 1024]\n",
      "INFO:transformers.modeling_xlnet:Loading TF weight model/transformer/word_embedding/lookup_table/adam_v with shape [32000, 1024]\n",
      "INFO:transformers.modeling_xlnet:Importing model/lm_loss/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/lm_loss/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/word_embedding/lookup_table\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/word_embedding/lookup_table\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/mask_emb/mask_emb\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/mask_emb/mask_emb\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_0/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_0/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_1/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_1/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_2/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_2/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_3/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_3/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_4/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_4/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_5/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_5/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_6/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_6/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_7/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_7/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_8/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_8/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_9/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_9/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/layer_1/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/layer_2/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_10/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_10/ff/layer_2/bias\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/o/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/q/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/k/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/r/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/rel_attn/v/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/LayerNorm/gamma\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/LayerNorm/beta\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Transposing\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/layer_1/kernel\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_11/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_11/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_12/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_12/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_13/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_13/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_14/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_14/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_15/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_15/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_16/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_16/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_17/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_17/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_18/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_18/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/rel_attn/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/rel_attn/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/rel_attn/o/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/rel_attn/q/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/rel_attn/k/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/rel_attn/r/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/rel_attn/v/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/ff/LayerNorm/gamma\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/ff/LayerNorm/beta\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/ff/layer_1/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/ff/layer_1/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Transposing\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/ff/layer_2/kernel\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/layer_19/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/layer_19/ff/layer_2/bias\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/r_r_bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 0\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 1\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 2\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 3\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 4\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 5\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 6\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 7\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 8\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 9\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 10\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 11\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 12\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 13\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 14\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 15\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 16\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 17\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 18\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_r_bias for layer 19\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/r_w_bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 0\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 1\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 2\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 3\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 4\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 5\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 6\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 7\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 8\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 9\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 10\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 11\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 12\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 13\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 14\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 15\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 16\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 17\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 18\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_w_bias for layer 19\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/r_s_bias\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 0\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 1\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 2\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 3\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 4\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 5\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 6\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 7\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 8\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 9\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 10\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 11\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 12\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 13\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 14\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 15\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 16\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 17\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 18\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/r_s_bias for layer 19\r\n",
      "INFO:transformers.modeling_xlnet:Importing model/transformer/seg_embed\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 0\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 1\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 2\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 3\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 4\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 5\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 6\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 7\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 8\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 9\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 10\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 11\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 12\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 13\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 14\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 15\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 16\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 17\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 18\r\n",
      "INFO:transformers.modeling_xlnet:Initialize PyTorch weight model/transformer/seg_embed for layer 19\r\n",
      "INFO:transformers.modeling_xlnet:Weights not copied to PyTorch model: global_step, model/lm_loss/bias/adam_m, model/lm_loss/bias/adam_v, model/transformer/layer_0/ff/LayerNorm/beta/adam_m, model/transformer/layer_0/ff/LayerNorm/beta/adam_v, model/transformer/layer_0/ff/LayerNorm/gamma/adam_m, model/transformer/layer_0/ff/LayerNorm/gamma/adam_v, model/transformer/layer_0/ff/layer_1/bias/adam_m, model/transformer/layer_0/ff/layer_1/bias/adam_v, model/transformer/layer_0/ff/layer_1/kernel/adam_m, model/transformer/layer_0/ff/layer_1/kernel/adam_v, model/transformer/layer_0/ff/layer_2/bias/adam_m, model/transformer/layer_0/ff/layer_2/bias/adam_v, model/transformer/layer_0/ff/layer_2/kernel/adam_m, model/transformer/layer_0/ff/layer_2/kernel/adam_v, model/transformer/layer_0/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_0/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_0/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_0/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_0/rel_attn/k/kernel/adam_m, model/transformer/layer_0/rel_attn/k/kernel/adam_v, model/transformer/layer_0/rel_attn/o/kernel/adam_m, model/transformer/layer_0/rel_attn/o/kernel/adam_v, model/transformer/layer_0/rel_attn/q/kernel/adam_m, model/transformer/layer_0/rel_attn/q/kernel/adam_v, model/transformer/layer_0/rel_attn/r/kernel/adam_m, model/transformer/layer_0/rel_attn/r/kernel/adam_v, model/transformer/layer_0/rel_attn/v/kernel/adam_m, model/transformer/layer_0/rel_attn/v/kernel/adam_v, model/transformer/layer_1/ff/LayerNorm/beta/adam_m, model/transformer/layer_1/ff/LayerNorm/beta/adam_v, model/transformer/layer_1/ff/LayerNorm/gamma/adam_m, model/transformer/layer_1/ff/LayerNorm/gamma/adam_v, model/transformer/layer_1/ff/layer_1/bias/adam_m, model/transformer/layer_1/ff/layer_1/bias/adam_v, model/transformer/layer_1/ff/layer_1/kernel/adam_m, model/transformer/layer_1/ff/layer_1/kernel/adam_v, model/transformer/layer_1/ff/layer_2/bias/adam_m, model/transformer/layer_1/ff/layer_2/bias/adam_v, model/transformer/layer_1/ff/layer_2/kernel/adam_m, model/transformer/layer_1/ff/layer_2/kernel/adam_v, model/transformer/layer_1/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_1/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_1/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_1/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_1/rel_attn/k/kernel/adam_m, model/transformer/layer_1/rel_attn/k/kernel/adam_v, model/transformer/layer_1/rel_attn/o/kernel/adam_m, model/transformer/layer_1/rel_attn/o/kernel/adam_v, model/transformer/layer_1/rel_attn/q/kernel/adam_m, model/transformer/layer_1/rel_attn/q/kernel/adam_v, model/transformer/layer_1/rel_attn/r/kernel/adam_m, model/transformer/layer_1/rel_attn/r/kernel/adam_v, model/transformer/layer_1/rel_attn/v/kernel/adam_m, model/transformer/layer_1/rel_attn/v/kernel/adam_v, model/transformer/layer_10/ff/LayerNorm/beta/adam_m, model/transformer/layer_10/ff/LayerNorm/beta/adam_v, model/transformer/layer_10/ff/LayerNorm/gamma/adam_m, model/transformer/layer_10/ff/LayerNorm/gamma/adam_v, model/transformer/layer_10/ff/layer_1/bias/adam_m, model/transformer/layer_10/ff/layer_1/bias/adam_v, model/transformer/layer_10/ff/layer_1/kernel/adam_m, model/transformer/layer_10/ff/layer_1/kernel/adam_v, model/transformer/layer_10/ff/layer_2/bias/adam_m, model/transformer/layer_10/ff/layer_2/bias/adam_v, model/transformer/layer_10/ff/layer_2/kernel/adam_m, model/transformer/layer_10/ff/layer_2/kernel/adam_v, model/transformer/layer_10/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_10/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_10/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_10/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_10/rel_attn/k/kernel/adam_m, model/transformer/layer_10/rel_attn/k/kernel/adam_v, model/transformer/layer_10/rel_attn/o/kernel/adam_m, model/transformer/layer_10/rel_attn/o/kernel/adam_v, model/transformer/layer_10/rel_attn/q/kernel/adam_m, model/transformer/layer_10/rel_attn/q/kernel/adam_v, model/transformer/layer_10/rel_attn/r/kernel/adam_m, model/transformer/layer_10/rel_attn/r/kernel/adam_v, model/transformer/layer_10/rel_attn/v/kernel/adam_m, model/transformer/layer_10/rel_attn/v/kernel/adam_v, model/transformer/layer_11/ff/LayerNorm/beta/adam_m, model/transformer/layer_11/ff/LayerNorm/beta/adam_v, model/transformer/layer_11/ff/LayerNorm/gamma/adam_m, model/transformer/layer_11/ff/LayerNorm/gamma/adam_v, model/transformer/layer_11/ff/layer_1/bias/adam_m, model/transformer/layer_11/ff/layer_1/bias/adam_v, model/transformer/layer_11/ff/layer_1/kernel/adam_m, model/transformer/layer_11/ff/layer_1/kernel/adam_v, model/transformer/layer_11/ff/layer_2/bias/adam_m, model/transformer/layer_11/ff/layer_2/bias/adam_v, model/transformer/layer_11/ff/layer_2/kernel/adam_m, model/transformer/layer_11/ff/layer_2/kernel/adam_v, model/transformer/layer_11/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_11/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_11/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_11/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_11/rel_attn/k/kernel/adam_m, model/transformer/layer_11/rel_attn/k/kernel/adam_v, model/transformer/layer_11/rel_attn/o/kernel/adam_m, model/transformer/layer_11/rel_attn/o/kernel/adam_v, model/transformer/layer_11/rel_attn/q/kernel/adam_m, model/transformer/layer_11/rel_attn/q/kernel/adam_v, model/transformer/layer_11/rel_attn/r/kernel/adam_m, model/transformer/layer_11/rel_attn/r/kernel/adam_v, model/transformer/layer_11/rel_attn/v/kernel/adam_m, model/transformer/layer_11/rel_attn/v/kernel/adam_v, model/transformer/layer_12/ff/LayerNorm/beta/adam_m, model/transformer/layer_12/ff/LayerNorm/beta/adam_v, model/transformer/layer_12/ff/LayerNorm/gamma/adam_m, model/transformer/layer_12/ff/LayerNorm/gamma/adam_v, model/transformer/layer_12/ff/layer_1/bias/adam_m, model/transformer/layer_12/ff/layer_1/bias/adam_v, model/transformer/layer_12/ff/layer_1/kernel/adam_m, model/transformer/layer_12/ff/layer_1/kernel/adam_v, model/transformer/layer_12/ff/layer_2/bias/adam_m, model/transformer/layer_12/ff/layer_2/bias/adam_v, model/transformer/layer_12/ff/layer_2/kernel/adam_m, model/transformer/layer_12/ff/layer_2/kernel/adam_v, model/transformer/layer_12/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_12/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_12/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_12/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_12/rel_attn/k/kernel/adam_m, model/transformer/layer_12/rel_attn/k/kernel/adam_v, model/transformer/layer_12/rel_attn/o/kernel/adam_m, model/transformer/layer_12/rel_attn/o/kernel/adam_v, model/transformer/layer_12/rel_attn/q/kernel/adam_m, model/transformer/layer_12/rel_attn/q/kernel/adam_v, model/transformer/layer_12/rel_attn/r/kernel/adam_m, model/transformer/layer_12/rel_attn/r/kernel/adam_v, model/transformer/layer_12/rel_attn/v/kernel/adam_m, model/transformer/layer_12/rel_attn/v/kernel/adam_v, model/transformer/layer_13/ff/LayerNorm/beta/adam_m, model/transformer/layer_13/ff/LayerNorm/beta/adam_v, model/transformer/layer_13/ff/LayerNorm/gamma/adam_m, model/transformer/layer_13/ff/LayerNorm/gamma/adam_v, model/transformer/layer_13/ff/layer_1/bias/adam_m, model/transformer/layer_13/ff/layer_1/bias/adam_v, model/transformer/layer_13/ff/layer_1/kernel/adam_m, model/transformer/layer_13/ff/layer_1/kernel/adam_v, model/transformer/layer_13/ff/layer_2/bias/adam_m, model/transformer/layer_13/ff/layer_2/bias/adam_v, model/transformer/layer_13/ff/layer_2/kernel/adam_m, model/transformer/layer_13/ff/layer_2/kernel/adam_v, model/transformer/layer_13/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_13/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_13/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_13/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_13/rel_attn/k/kernel/adam_m, model/transformer/layer_13/rel_attn/k/kernel/adam_v, model/transformer/layer_13/rel_attn/o/kernel/adam_m, model/transformer/layer_13/rel_attn/o/kernel/adam_v, model/transformer/layer_13/rel_attn/q/kernel/adam_m, model/transformer/layer_13/rel_attn/q/kernel/adam_v, model/transformer/layer_13/rel_attn/r/kernel/adam_m, model/transformer/layer_13/rel_attn/r/kernel/adam_v, model/transformer/layer_13/rel_attn/v/kernel/adam_m, model/transformer/layer_13/rel_attn/v/kernel/adam_v, model/transformer/layer_14/ff/LayerNorm/beta/adam_m, model/transformer/layer_14/ff/LayerNorm/beta/adam_v, model/transformer/layer_14/ff/LayerNorm/gamma/adam_m, model/transformer/layer_14/ff/LayerNorm/gamma/adam_v, model/transformer/layer_14/ff/layer_1/bias/adam_m, model/transformer/layer_14/ff/layer_1/bias/adam_v, model/transformer/layer_14/ff/layer_1/kernel/adam_m, model/transformer/layer_14/ff/layer_1/kernel/adam_v, model/transformer/layer_14/ff/layer_2/bias/adam_m, model/transformer/layer_14/ff/layer_2/bias/adam_v, model/transformer/layer_14/ff/layer_2/kernel/adam_m, model/transformer/layer_14/ff/layer_2/kernel/adam_v, model/transformer/layer_14/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_14/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_14/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_14/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_14/rel_attn/k/kernel/adam_m, model/transformer/layer_14/rel_attn/k/kernel/adam_v, model/transformer/layer_14/rel_attn/o/kernel/adam_m, model/transformer/layer_14/rel_attn/o/kernel/adam_v, model/transformer/layer_14/rel_attn/q/kernel/adam_m, model/transformer/layer_14/rel_attn/q/kernel/adam_v, model/transformer/layer_14/rel_attn/r/kernel/adam_m, model/transformer/layer_14/rel_attn/r/kernel/adam_v, model/transformer/layer_14/rel_attn/v/kernel/adam_m, model/transformer/layer_14/rel_attn/v/kernel/adam_v, model/transformer/layer_15/ff/LayerNorm/beta/adam_m, model/transformer/layer_15/ff/LayerNorm/beta/adam_v, model/transformer/layer_15/ff/LayerNorm/gamma/adam_m, model/transformer/layer_15/ff/LayerNorm/gamma/adam_v, model/transformer/layer_15/ff/layer_1/bias/adam_m, model/transformer/layer_15/ff/layer_1/bias/adam_v, model/transformer/layer_15/ff/layer_1/kernel/adam_m, model/transformer/layer_15/ff/layer_1/kernel/adam_v, model/transformer/layer_15/ff/layer_2/bias/adam_m, model/transformer/layer_15/ff/layer_2/bias/adam_v, model/transformer/layer_15/ff/layer_2/kernel/adam_m, model/transformer/layer_15/ff/layer_2/kernel/adam_v, model/transformer/layer_15/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_15/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_15/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_15/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_15/rel_attn/k/kernel/adam_m, model/transformer/layer_15/rel_attn/k/kernel/adam_v, model/transformer/layer_15/rel_attn/o/kernel/adam_m, model/transformer/layer_15/rel_attn/o/kernel/adam_v, model/transformer/layer_15/rel_attn/q/kernel/adam_m, model/transformer/layer_15/rel_attn/q/kernel/adam_v, model/transformer/layer_15/rel_attn/r/kernel/adam_m, model/transformer/layer_15/rel_attn/r/kernel/adam_v, model/transformer/layer_15/rel_attn/v/kernel/adam_m, model/transformer/layer_15/rel_attn/v/kernel/adam_v, model/transformer/layer_16/ff/LayerNorm/beta/adam_m, model/transformer/layer_16/ff/LayerNorm/beta/adam_v, model/transformer/layer_16/ff/LayerNorm/gamma/adam_m, model/transformer/layer_16/ff/LayerNorm/gamma/adam_v, model/transformer/layer_16/ff/layer_1/bias/adam_m, model/transformer/layer_16/ff/layer_1/bias/adam_v, model/transformer/layer_16/ff/layer_1/kernel/adam_m, model/transformer/layer_16/ff/layer_1/kernel/adam_v, model/transformer/layer_16/ff/layer_2/bias/adam_m, model/transformer/layer_16/ff/layer_2/bias/adam_v, model/transformer/layer_16/ff/layer_2/kernel/adam_m, model/transformer/layer_16/ff/layer_2/kernel/adam_v, model/transformer/layer_16/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_16/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_16/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_16/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_16/rel_attn/k/kernel/adam_m, model/transformer/layer_16/rel_attn/k/kernel/adam_v, model/transformer/layer_16/rel_attn/o/kernel/adam_m, model/transformer/layer_16/rel_attn/o/kernel/adam_v, model/transformer/layer_16/rel_attn/q/kernel/adam_m, model/transformer/layer_16/rel_attn/q/kernel/adam_v, model/transformer/layer_16/rel_attn/r/kernel/adam_m, model/transformer/layer_16/rel_attn/r/kernel/adam_v, model/transformer/layer_16/rel_attn/v/kernel/adam_m, model/transformer/layer_16/rel_attn/v/kernel/adam_v, model/transformer/layer_17/ff/LayerNorm/beta/adam_m, model/transformer/layer_17/ff/LayerNorm/beta/adam_v, model/transformer/layer_17/ff/LayerNorm/gamma/adam_m, model/transformer/layer_17/ff/LayerNorm/gamma/adam_v, model/transformer/layer_17/ff/layer_1/bias/adam_m, model/transformer/layer_17/ff/layer_1/bias/adam_v, model/transformer/layer_17/ff/layer_1/kernel/adam_m, model/transformer/layer_17/ff/layer_1/kernel/adam_v, model/transformer/layer_17/ff/layer_2/bias/adam_m, model/transformer/layer_17/ff/layer_2/bias/adam_v, model/transformer/layer_17/ff/layer_2/kernel/adam_m, model/transformer/layer_17/ff/layer_2/kernel/adam_v, model/transformer/layer_17/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_17/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_17/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_17/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_17/rel_attn/k/kernel/adam_m, model/transformer/layer_17/rel_attn/k/kernel/adam_v, model/transformer/layer_17/rel_attn/o/kernel/adam_m, model/transformer/layer_17/rel_attn/o/kernel/adam_v, model/transformer/layer_17/rel_attn/q/kernel/adam_m, model/transformer/layer_17/rel_attn/q/kernel/adam_v, model/transformer/layer_17/rel_attn/r/kernel/adam_m, model/transformer/layer_17/rel_attn/r/kernel/adam_v, model/transformer/layer_17/rel_attn/v/kernel/adam_m, model/transformer/layer_17/rel_attn/v/kernel/adam_v, model/transformer/layer_18/ff/LayerNorm/beta/adam_m, model/transformer/layer_18/ff/LayerNorm/beta/adam_v, model/transformer/layer_18/ff/LayerNorm/gamma/adam_m, model/transformer/layer_18/ff/LayerNorm/gamma/adam_v, model/transformer/layer_18/ff/layer_1/bias/adam_m, model/transformer/layer_18/ff/layer_1/bias/adam_v, model/transformer/layer_18/ff/layer_1/kernel/adam_m, model/transformer/layer_18/ff/layer_1/kernel/adam_v, model/transformer/layer_18/ff/layer_2/bias/adam_m, model/transformer/layer_18/ff/layer_2/bias/adam_v, model/transformer/layer_18/ff/layer_2/kernel/adam_m, model/transformer/layer_18/ff/layer_2/kernel/adam_v, model/transformer/layer_18/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_18/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_18/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_18/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_18/rel_attn/k/kernel/adam_m, model/transformer/layer_18/rel_attn/k/kernel/adam_v, model/transformer/layer_18/rel_attn/o/kernel/adam_m, model/transformer/layer_18/rel_attn/o/kernel/adam_v, model/transformer/layer_18/rel_attn/q/kernel/adam_m, model/transformer/layer_18/rel_attn/q/kernel/adam_v, model/transformer/layer_18/rel_attn/r/kernel/adam_m, model/transformer/layer_18/rel_attn/r/kernel/adam_v, model/transformer/layer_18/rel_attn/v/kernel/adam_m, model/transformer/layer_18/rel_attn/v/kernel/adam_v, model/transformer/layer_19/ff/LayerNorm/beta/adam_m, model/transformer/layer_19/ff/LayerNorm/beta/adam_v, model/transformer/layer_19/ff/LayerNorm/gamma/adam_m, model/transformer/layer_19/ff/LayerNorm/gamma/adam_v, model/transformer/layer_19/ff/layer_1/bias/adam_m, model/transformer/layer_19/ff/layer_1/bias/adam_v, model/transformer/layer_19/ff/layer_1/kernel/adam_m, model/transformer/layer_19/ff/layer_1/kernel/adam_v, model/transformer/layer_19/ff/layer_2/bias/adam_m, model/transformer/layer_19/ff/layer_2/bias/adam_v, model/transformer/layer_19/ff/layer_2/kernel/adam_m, model/transformer/layer_19/ff/layer_2/kernel/adam_v, model/transformer/layer_19/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_19/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_19/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_19/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_19/rel_attn/k/kernel/adam_m, model/transformer/layer_19/rel_attn/k/kernel/adam_v, model/transformer/layer_19/rel_attn/o/kernel/adam_m, model/transformer/layer_19/rel_attn/o/kernel/adam_v, model/transformer/layer_19/rel_attn/q/kernel/adam_m, model/transformer/layer_19/rel_attn/q/kernel/adam_v, model/transformer/layer_19/rel_attn/r/kernel/adam_m, model/transformer/layer_19/rel_attn/r/kernel/adam_v, model/transformer/layer_19/rel_attn/v/kernel/adam_m, model/transformer/layer_19/rel_attn/v/kernel/adam_v, model/transformer/layer_2/ff/LayerNorm/beta/adam_m, model/transformer/layer_2/ff/LayerNorm/beta/adam_v, model/transformer/layer_2/ff/LayerNorm/gamma/adam_m, model/transformer/layer_2/ff/LayerNorm/gamma/adam_v, model/transformer/layer_2/ff/layer_1/bias/adam_m, model/transformer/layer_2/ff/layer_1/bias/adam_v, model/transformer/layer_2/ff/layer_1/kernel/adam_m, model/transformer/layer_2/ff/layer_1/kernel/adam_v, model/transformer/layer_2/ff/layer_2/bias/adam_m, model/transformer/layer_2/ff/layer_2/bias/adam_v, model/transformer/layer_2/ff/layer_2/kernel/adam_m, model/transformer/layer_2/ff/layer_2/kernel/adam_v, model/transformer/layer_2/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_2/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_2/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_2/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_2/rel_attn/k/kernel/adam_m, model/transformer/layer_2/rel_attn/k/kernel/adam_v, model/transformer/layer_2/rel_attn/o/kernel/adam_m, model/transformer/layer_2/rel_attn/o/kernel/adam_v, model/transformer/layer_2/rel_attn/q/kernel/adam_m, model/transformer/layer_2/rel_attn/q/kernel/adam_v, model/transformer/layer_2/rel_attn/r/kernel/adam_m, model/transformer/layer_2/rel_attn/r/kernel/adam_v, model/transformer/layer_2/rel_attn/v/kernel/adam_m, model/transformer/layer_2/rel_attn/v/kernel/adam_v, model/transformer/layer_3/ff/LayerNorm/beta/adam_m, model/transformer/layer_3/ff/LayerNorm/beta/adam_v, model/transformer/layer_3/ff/LayerNorm/gamma/adam_m, model/transformer/layer_3/ff/LayerNorm/gamma/adam_v, model/transformer/layer_3/ff/layer_1/bias/adam_m, model/transformer/layer_3/ff/layer_1/bias/adam_v, model/transformer/layer_3/ff/layer_1/kernel/adam_m, model/transformer/layer_3/ff/layer_1/kernel/adam_v, model/transformer/layer_3/ff/layer_2/bias/adam_m, model/transformer/layer_3/ff/layer_2/bias/adam_v, model/transformer/layer_3/ff/layer_2/kernel/adam_m, model/transformer/layer_3/ff/layer_2/kernel/adam_v, model/transformer/layer_3/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_3/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_3/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_3/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_3/rel_attn/k/kernel/adam_m, model/transformer/layer_3/rel_attn/k/kernel/adam_v, model/transformer/layer_3/rel_attn/o/kernel/adam_m, model/transformer/layer_3/rel_attn/o/kernel/adam_v, model/transformer/layer_3/rel_attn/q/kernel/adam_m, model/transformer/layer_3/rel_attn/q/kernel/adam_v, model/transformer/layer_3/rel_attn/r/kernel/adam_m, model/transformer/layer_3/rel_attn/r/kernel/adam_v, model/transformer/layer_3/rel_attn/v/kernel/adam_m, model/transformer/layer_3/rel_attn/v/kernel/adam_v, model/transformer/layer_4/ff/LayerNorm/beta/adam_m, model/transformer/layer_4/ff/LayerNorm/beta/adam_v, model/transformer/layer_4/ff/LayerNorm/gamma/adam_m, model/transformer/layer_4/ff/LayerNorm/gamma/adam_v, model/transformer/layer_4/ff/layer_1/bias/adam_m, model/transformer/layer_4/ff/layer_1/bias/adam_v, model/transformer/layer_4/ff/layer_1/kernel/adam_m, model/transformer/layer_4/ff/layer_1/kernel/adam_v, model/transformer/layer_4/ff/layer_2/bias/adam_m, model/transformer/layer_4/ff/layer_2/bias/adam_v, model/transformer/layer_4/ff/layer_2/kernel/adam_m, model/transformer/layer_4/ff/layer_2/kernel/adam_v, model/transformer/layer_4/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_4/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_4/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_4/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_4/rel_attn/k/kernel/adam_m, model/transformer/layer_4/rel_attn/k/kernel/adam_v, model/transformer/layer_4/rel_attn/o/kernel/adam_m, model/transformer/layer_4/rel_attn/o/kernel/adam_v, model/transformer/layer_4/rel_attn/q/kernel/adam_m, model/transformer/layer_4/rel_attn/q/kernel/adam_v, model/transformer/layer_4/rel_attn/r/kernel/adam_m, model/transformer/layer_4/rel_attn/r/kernel/adam_v, model/transformer/layer_4/rel_attn/v/kernel/adam_m, model/transformer/layer_4/rel_attn/v/kernel/adam_v, model/transformer/layer_5/ff/LayerNorm/beta/adam_m, model/transformer/layer_5/ff/LayerNorm/beta/adam_v, model/transformer/layer_5/ff/LayerNorm/gamma/adam_m, model/transformer/layer_5/ff/LayerNorm/gamma/adam_v, model/transformer/layer_5/ff/layer_1/bias/adam_m, model/transformer/layer_5/ff/layer_1/bias/adam_v, model/transformer/layer_5/ff/layer_1/kernel/adam_m, model/transformer/layer_5/ff/layer_1/kernel/adam_v, model/transformer/layer_5/ff/layer_2/bias/adam_m, model/transformer/layer_5/ff/layer_2/bias/adam_v, model/transformer/layer_5/ff/layer_2/kernel/adam_m, model/transformer/layer_5/ff/layer_2/kernel/adam_v, model/transformer/layer_5/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_5/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_5/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_5/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_5/rel_attn/k/kernel/adam_m, model/transformer/layer_5/rel_attn/k/kernel/adam_v, model/transformer/layer_5/rel_attn/o/kernel/adam_m, model/transformer/layer_5/rel_attn/o/kernel/adam_v, model/transformer/layer_5/rel_attn/q/kernel/adam_m, model/transformer/layer_5/rel_attn/q/kernel/adam_v, model/transformer/layer_5/rel_attn/r/kernel/adam_m, model/transformer/layer_5/rel_attn/r/kernel/adam_v, model/transformer/layer_5/rel_attn/v/kernel/adam_m, model/transformer/layer_5/rel_attn/v/kernel/adam_v, model/transformer/layer_6/ff/LayerNorm/beta/adam_m, model/transformer/layer_6/ff/LayerNorm/beta/adam_v, model/transformer/layer_6/ff/LayerNorm/gamma/adam_m, model/transformer/layer_6/ff/LayerNorm/gamma/adam_v, model/transformer/layer_6/ff/layer_1/bias/adam_m, model/transformer/layer_6/ff/layer_1/bias/adam_v, model/transformer/layer_6/ff/layer_1/kernel/adam_m, model/transformer/layer_6/ff/layer_1/kernel/adam_v, model/transformer/layer_6/ff/layer_2/bias/adam_m, model/transformer/layer_6/ff/layer_2/bias/adam_v, model/transformer/layer_6/ff/layer_2/kernel/adam_m, model/transformer/layer_6/ff/layer_2/kernel/adam_v, model/transformer/layer_6/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_6/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_6/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_6/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_6/rel_attn/k/kernel/adam_m, model/transformer/layer_6/rel_attn/k/kernel/adam_v, model/transformer/layer_6/rel_attn/o/kernel/adam_m, model/transformer/layer_6/rel_attn/o/kernel/adam_v, model/transformer/layer_6/rel_attn/q/kernel/adam_m, model/transformer/layer_6/rel_attn/q/kernel/adam_v, model/transformer/layer_6/rel_attn/r/kernel/adam_m, model/transformer/layer_6/rel_attn/r/kernel/adam_v, model/transformer/layer_6/rel_attn/v/kernel/adam_m, model/transformer/layer_6/rel_attn/v/kernel/adam_v, model/transformer/layer_7/ff/LayerNorm/beta/adam_m, model/transformer/layer_7/ff/LayerNorm/beta/adam_v, model/transformer/layer_7/ff/LayerNorm/gamma/adam_m, model/transformer/layer_7/ff/LayerNorm/gamma/adam_v, model/transformer/layer_7/ff/layer_1/bias/adam_m, model/transformer/layer_7/ff/layer_1/bias/adam_v, model/transformer/layer_7/ff/layer_1/kernel/adam_m, model/transformer/layer_7/ff/layer_1/kernel/adam_v, model/transformer/layer_7/ff/layer_2/bias/adam_m, model/transformer/layer_7/ff/layer_2/bias/adam_v, model/transformer/layer_7/ff/layer_2/kernel/adam_m, model/transformer/layer_7/ff/layer_2/kernel/adam_v, model/transformer/layer_7/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_7/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_7/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_7/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_7/rel_attn/k/kernel/adam_m, model/transformer/layer_7/rel_attn/k/kernel/adam_v, model/transformer/layer_7/rel_attn/o/kernel/adam_m, model/transformer/layer_7/rel_attn/o/kernel/adam_v, model/transformer/layer_7/rel_attn/q/kernel/adam_m, model/transformer/layer_7/rel_attn/q/kernel/adam_v, model/transformer/layer_7/rel_attn/r/kernel/adam_m, model/transformer/layer_7/rel_attn/r/kernel/adam_v, model/transformer/layer_7/rel_attn/v/kernel/adam_m, model/transformer/layer_7/rel_attn/v/kernel/adam_v, model/transformer/layer_8/ff/LayerNorm/beta/adam_m, model/transformer/layer_8/ff/LayerNorm/beta/adam_v, model/transformer/layer_8/ff/LayerNorm/gamma/adam_m, model/transformer/layer_8/ff/LayerNorm/gamma/adam_v, model/transformer/layer_8/ff/layer_1/bias/adam_m, model/transformer/layer_8/ff/layer_1/bias/adam_v, model/transformer/layer_8/ff/layer_1/kernel/adam_m, model/transformer/layer_8/ff/layer_1/kernel/adam_v, model/transformer/layer_8/ff/layer_2/bias/adam_m, model/transformer/layer_8/ff/layer_2/bias/adam_v, model/transformer/layer_8/ff/layer_2/kernel/adam_m, model/transformer/layer_8/ff/layer_2/kernel/adam_v, model/transformer/layer_8/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_8/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_8/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_8/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_8/rel_attn/k/kernel/adam_m, model/transformer/layer_8/rel_attn/k/kernel/adam_v, model/transformer/layer_8/rel_attn/o/kernel/adam_m, model/transformer/layer_8/rel_attn/o/kernel/adam_v, model/transformer/layer_8/rel_attn/q/kernel/adam_m, model/transformer/layer_8/rel_attn/q/kernel/adam_v, model/transformer/layer_8/rel_attn/r/kernel/adam_m, model/transformer/layer_8/rel_attn/r/kernel/adam_v, model/transformer/layer_8/rel_attn/v/kernel/adam_m, model/transformer/layer_8/rel_attn/v/kernel/adam_v, model/transformer/layer_9/ff/LayerNorm/beta/adam_m, model/transformer/layer_9/ff/LayerNorm/beta/adam_v, model/transformer/layer_9/ff/LayerNorm/gamma/adam_m, model/transformer/layer_9/ff/LayerNorm/gamma/adam_v, model/transformer/layer_9/ff/layer_1/bias/adam_m, model/transformer/layer_9/ff/layer_1/bias/adam_v, model/transformer/layer_9/ff/layer_1/kernel/adam_m, model/transformer/layer_9/ff/layer_1/kernel/adam_v, model/transformer/layer_9/ff/layer_2/bias/adam_m, model/transformer/layer_9/ff/layer_2/bias/adam_v, model/transformer/layer_9/ff/layer_2/kernel/adam_m, model/transformer/layer_9/ff/layer_2/kernel/adam_v, model/transformer/layer_9/rel_attn/LayerNorm/beta/adam_m, model/transformer/layer_9/rel_attn/LayerNorm/beta/adam_v, model/transformer/layer_9/rel_attn/LayerNorm/gamma/adam_m, model/transformer/layer_9/rel_attn/LayerNorm/gamma/adam_v, model/transformer/layer_9/rel_attn/k/kernel/adam_m, model/transformer/layer_9/rel_attn/k/kernel/adam_v, model/transformer/layer_9/rel_attn/o/kernel/adam_m, model/transformer/layer_9/rel_attn/o/kernel/adam_v, model/transformer/layer_9/rel_attn/q/kernel/adam_m, model/transformer/layer_9/rel_attn/q/kernel/adam_v, model/transformer/layer_9/rel_attn/r/kernel/adam_m, model/transformer/layer_9/rel_attn/r/kernel/adam_v, model/transformer/layer_9/rel_attn/v/kernel/adam_m, model/transformer/layer_9/rel_attn/v/kernel/adam_v, model/transformer/mask_emb/mask_emb/adam_m, model/transformer/mask_emb/mask_emb/adam_v, model/transformer/r_r_bias/adam_m, model/transformer/r_r_bias/adam_v, model/transformer/r_s_bias/adam_m, model/transformer/r_s_bias/adam_v, model/transformer/r_w_bias/adam_m, model/transformer/r_w_bias/adam_v, model/transformer/seg_embed/adam_m, model/transformer/seg_embed/adam_v, model/transformer/word_embedding/lookup_table/adam_m, model/transformer/word_embedding/lookup_table/adam_v\r\n",
      "Save PyTorch model to /home/husein/xlnet/xlnet-large-bahasa-standard-cased/pytorch_model.bin\r\n",
      "Save configuration file to /home/husein/xlnet/xlnet-large-bahasa-standard-cased/config.json\r\n"
     ]
    }
   ],
   "source": [
    "!transformers-cli convert --model_type xlnet \\\n",
    "  --tf_checkpoint xlnet-large/model.ckpt-500000 \\\n",
    "  --config xlnet-large/xlnet-large_config.json \\\n",
    "  --pytorch_dump_output xlnet-large-bahasa-standard-cased"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "directory = 'xlnet-large-bahasa-standard-cased'\n",
    "config = XLNetConfig(f'{directory}/config.json')\n",
    "config.vocab_size = 32000\n",
    "config.d_inner = 4096\n",
    "config.d_model = 1024\n",
    "config.n_head = 16\n",
    "config.n_layer = 20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = AutoModelWithLMHead.from_pretrained('./xlnet-large-bahasa-standard-cased/pytorch_model.bin', config = config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'sequence': 'makan ayam dengan sdm<sep><cls>',\n",
       "  'score': 0.007230748888105154,\n",
       "  'token': 12829,\n",
       "  'token_str': '▁sdm'},\n",
       " {'sequence': 'makan ayam dengan Bentar<sep><cls>',\n",
       "  'score': 0.005983198527246714,\n",
       "  'token': 15544,\n",
       "  'token_str': '▁Bentar'},\n",
       " {'sequence': 'makan ayam denganج<sep><cls>',\n",
       "  'score': 0.003996539860963821,\n",
       "  'token': 13344,\n",
       "  'token_str': 'ج'},\n",
       " {'sequence': 'makan ayam dengan seperjuangan<sep><cls>',\n",
       "  'score': 0.003097530920058489,\n",
       "  'token': 28412,\n",
       "  'token_str': '▁seperjuangan'},\n",
       " {'sequence': 'makan ayam dengan GIMANA<sep><cls>',\n",
       "  'score': 0.002888893475756049,\n",
       "  'token': 25453,\n",
       "  'token_str': '▁GIMANA'}]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fill_mask('makan ayam dengan <mask>')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_pretrained('xlnet-large-bahasa-standard-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !transformers-cli upload ./xlnet-large-bahasa-standard-cased"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XLNetModel.from_pretrained('huseinzol05/xlnet-base-bahasa-cased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = XLNetTokenizer.from_pretrained('huseinzol05/xlnet-base-bahasa-cased', do_lower_case = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_ids = torch.tensor([tokenizer.encode(\"husein tk suka mkan ayam\", add_special_tokens=True)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with torch.no_grad():\n",
    "    last_hidden_states = model(input_ids)[0]\n",
    "    \n",
    "last_hidden_states\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = AutoModelWithLMHead.from_pretrained('huseinzol05/xlnet-base-bahasa-cased')\n",
    "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)\n",
    "fill_mask('makan ayam dengan <mask>')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
